From 9f6b2232e170261fac275db18870212fa5b01a9f Mon Sep 17 00:00:00 2001 From: "kaf24@camelot.eng.3leafnetworks.com" Date: Fri, 3 Sep 2004 22:26:02 +0000 Subject: [PATCH] bitkeeper revision 1.1159.71.1 (4138ef7arWNBBMfQRCJaRyf93H0eqw) Remove L1 PT pinning from Xen. Clean up the fixes for writable page tables. --- BitKeeper/etc/logging_ok | 1 + .../include/asm-xen/pgalloc.h | 2 - .../arch/xen/i386/mm/hypervisor.c | 50 +----------- .../include/asm-xen/hypervisor.h | 12 --- tools/libxc/xc_linux_build.c | 2 +- tools/libxc/xc_linux_restore.c | 30 +++---- tools/libxc/xc_netbsd_build.c | 2 +- xen/arch/x86/memory.c | 45 +++++------ xen/common/schedule.c | 14 ---- xen/include/asm-x86/mm.h | 81 ++----------------- xen/include/hypervisor-ifs/hypervisor-if.h | 11 +-- 11 files changed, 42 insertions(+), 208 deletions(-) diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index 35d2bada8d..9332c02f2b 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -19,6 +19,7 @@ iap10@striker.cl.cam.ac.uk iap10@tetris.cl.cam.ac.uk jws22@gauntlet.cl.cam.ac.uk jws@cairnwell.research +kaf24@camelot.eng.3leafnetworks.com kaf24@freefall.cl.cam.ac.uk kaf24@labyrinth.cl.cam.ac.uk kaf24@penguin.local diff --git a/linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h b/linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h index f6bee4d689..be678385f3 100644 --- a/linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h +++ b/linux-2.4.27-xen-sparse/include/asm-xen/pgalloc.h @@ -134,7 +134,6 @@ static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) { clear_page(pte); __make_page_readonly(pte); - queue_pte_pin(__pa(pte)); } return pte; @@ -153,7 +152,6 @@ static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, static __inline__ void pte_free_slow(pte_t *pte) { - queue_pte_unpin(__pa(pte)); __make_page_writable(pte); free_page((unsigned long)pte); } diff --git a/linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/hypervisor.c index a57eabcd8c..03158c3ee4 100644 --- a/linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/hypervisor.c +++ b/linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/hypervisor.c @@ -85,8 +85,6 @@ static void DEBUG_disallow_pt_read(unsigned long va) #undef queue_invlpg #undef queue_pgd_pin #undef queue_pgd_unpin -#undef queue_pte_pin -#undef queue_pte_unpin #undef queue_set_ldt #endif @@ -219,7 +217,7 @@ void queue_pgd_pin(unsigned long ptr) spin_lock_irqsave(&update_lock, flags); update_queue[idx].ptr = phys_to_machine(ptr); update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_PIN_L2_TABLE; + update_queue[idx].val = MMUEXT_PIN_TABLE; increment_index(); spin_unlock_irqrestore(&update_lock, flags); } @@ -235,28 +233,6 @@ void queue_pgd_unpin(unsigned long ptr) spin_unlock_irqrestore(&update_lock, flags); } -void queue_pte_pin(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_PIN_L1_TABLE; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void queue_pte_unpin(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_UNPIN_TABLE; - increment_index(); - spin_unlock_irqrestore(&update_lock, flags); -} - void queue_set_ldt(unsigned long ptr, unsigned long len) { unsigned long flags; @@ -339,7 +315,7 @@ void xen_pgd_pin(unsigned long ptr) spin_lock_irqsave(&update_lock, flags); update_queue[idx].ptr = phys_to_machine(ptr); update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_PIN_L2_TABLE; + update_queue[idx].val = MMUEXT_PIN_TABLE; increment_index_and_flush(); spin_unlock_irqrestore(&update_lock, flags); } @@ -355,28 +331,6 @@ void xen_pgd_unpin(unsigned long ptr) spin_unlock_irqrestore(&update_lock, flags); } -void xen_pte_pin(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_PIN_L1_TABLE; - increment_index_and_flush(); - spin_unlock_irqrestore(&update_lock, flags); -} - -void xen_pte_unpin(unsigned long ptr) -{ - unsigned long flags; - spin_lock_irqsave(&update_lock, flags); - update_queue[idx].ptr = phys_to_machine(ptr); - update_queue[idx].ptr |= MMU_EXTENDED_COMMAND; - update_queue[idx].val = MMUEXT_UNPIN_TABLE; - increment_index_and_flush(); - spin_unlock_irqrestore(&update_lock, flags); -} - void xen_set_ldt(unsigned long ptr, unsigned long len) { unsigned long flags; diff --git a/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h b/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h index 4d7ddc55a7..0de4075b28 100644 --- a/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h +++ b/linux-2.6.8.1-xen-sparse/include/asm-xen/hypervisor.h @@ -54,8 +54,6 @@ void queue_tlb_flush(void); void queue_invlpg(unsigned long ptr); void queue_pgd_pin(unsigned long ptr); void queue_pgd_unpin(unsigned long ptr); -void queue_pte_pin(unsigned long ptr); -void queue_pte_unpin(unsigned long ptr); void queue_set_ldt(unsigned long ptr, unsigned long bytes); void queue_machphys_update(unsigned long mfn, unsigned long pfn); void xen_l1_entry_update(pte_t *ptr, unsigned long val); @@ -65,8 +63,6 @@ void xen_tlb_flush(void); void xen_invlpg(unsigned long ptr); void xen_pgd_pin(unsigned long ptr); void xen_pgd_unpin(unsigned long ptr); -void xen_pte_pin(unsigned long ptr); -void xen_pte_unpin(unsigned long ptr); void xen_set_ldt(unsigned long ptr, unsigned long bytes); void xen_machphys_update(unsigned long mfn, unsigned long pfn); #define MMU_UPDATE_DEBUG 0 @@ -141,14 +137,6 @@ extern page_update_debug_t update_debug_queue[]; printk("PGD UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ queue_pgd_unpin(_p); \ }) -#define queue_pte_pin(_p) ({ \ - printk("PTE PIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ - queue_pte_pin(_p); \ -}) -#define queue_pte_unpin(_p) ({ \ - printk("PTE UNPIN %s %d: %08lx\n", __FILE__, __LINE__, (_p)); \ - queue_pte_unpin(_p); \ -}) #define queue_set_ldt(_p,_l) ({ \ printk("SETL LDT %s %d: %08lx %d\n", __FILE__, __LINE__, (_p), (_l)); \ queue_set_ldt((_p), (_l)); \ diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c index 41569b8057..1242334dad 100644 --- a/tools/libxc/xc_linux_build.c +++ b/tools/libxc/xc_linux_build.c @@ -270,7 +270,7 @@ static int setup_guestos(int xc_handle, * correct protection for the page */ if ( add_mmu_update(xc_handle, mmu, - l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) ) + l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_TABLE) ) goto error_out; start_info = map_pfn_writeable( diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c index fe7522e1ed..5e4739f84b 100644 --- a/tools/libxc/xc_linux_restore.c +++ b/tools/libxc/xc_linux_restore.c @@ -473,28 +473,16 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt) */ for ( i = 0; i < nr_pfns; i++ ) { - if ( pfn_type[i] == (L1TAB|LPINTAB) ) - { - if ( add_mmu_update(xc_handle, mmu, - (pfn_to_mfn_table[i]<> 2 )) ) + ((unsigned long)pl2e & + ~PAGE_MASK) >> 2)) ) return 0; if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) @@ -826,21 +827,9 @@ static int do_extended_command(unsigned long ptr, unsigned long val) switch ( cmd ) { - case MMUEXT_PIN_L1_TABLE: - case MMUEXT_PIN_L2_TABLE: - - /* When we pin an L1 page we now insist that the va - backpointer (used for writable page tables) must still be - mutable. This is an additional restriction even for guests - that don't use writable page tables, but I don't think it - will break anything as guests typically pin pages before - they are used, hence they'll still be mutable. */ - + case MMUEXT_PIN_TABLE: okay = get_page_and_type_from_pagenr( - pfn, - ((cmd==MMUEXT_PIN_L2_TABLE) ? - PGT_l2_page_table : (PGT_l1_page_table | PGT_va_mutable) ) , - FOREIGNDOM); + pfn, PGT_l2_page_table, FOREIGNDOM); if ( unlikely(!okay) ) { @@ -1183,6 +1172,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) unsigned long prev_spfn = 0; l1_pgentry_t *prev_spl1e = 0; struct domain *d = current; + u32 type_info; perfc_incrc(calls_to_mmu_update); perfc_addc(num_page_updates, count); @@ -1231,10 +1221,11 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) } page = &frame_table[pfn]; - switch ( (page->u.inuse.type_info & PGT_type_mask) ) + switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask ) { case PGT_l1_page_table: - if ( likely(passive_get_page_type(page, PGT_l1_page_table)) ) + if ( likely(get_page_type( + page, type_info & (PGT_type_mask|PGT_va_mask))) ) { okay = mod_l1_entry((l1_pgentry_t *)va, mk_l1_pgentry(req.val)); @@ -1484,11 +1475,11 @@ void ptwr_reconnect_disconnected(unsigned long addr) [ptwr_info[cpu].writable_l1>>PAGE_SHIFT]; #ifdef PTWR_TRACK_DOMAIN - if (ptwr_domain[cpu] != get_current()->domain) + if (ptwr_domain[cpu] != current->domain) printk("ptwr_reconnect_disconnected domain mismatch %d != %d\n", - ptwr_domain[cpu], get_current()->domain); + ptwr_domain[cpu], current->domain); #endif - PTWR_PRINTK(("[A] page fault in disconnected space: addr %08lx space %08lx\n", + PTWR_PRINTK(("[A] page fault in disconn space: addr %08lx space %08lx\n", addr, ptwr_info[cpu].disconnected << L2_PAGETABLE_SHIFT)); pl2e = &linear_l2_table[ptwr_info[cpu].disconnected]; @@ -1560,9 +1551,9 @@ void ptwr_flush_inactive(void) int i, idx; #ifdef PTWR_TRACK_DOMAIN - if (ptwr_info[cpu].domain != get_current()->domain) + if (ptwr_info[cpu].domain != current->domain) printk("ptwr_flush_inactive domain mismatch %d != %d\n", - ptwr_info[cpu].domain, get_current()->domain); + ptwr_info[cpu].domain, current->domain); #endif #if 0 { @@ -1643,9 +1634,9 @@ int ptwr_do_page_fault(unsigned long addr) if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table ) { #ifdef PTWR_TRACK_DOMAIN - if ( ptwr_info[cpu].domain != get_current()->domain ) + if ( ptwr_info[cpu].domain != current->domain ) printk("ptwr_do_page_fault domain mismatch %d != %d\n", - ptwr_info[cpu].domain, get_current()->domain); + ptwr_info[cpu].domain, current->domain); #endif pl2e = &linear_l2_table[(page->u.inuse.type_info & PGT_va_mask) >> PGT_va_shift]; diff --git a/xen/common/schedule.c b/xen/common/schedule.c index cc06d3c085..a986ee06a1 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -374,20 +374,6 @@ void __enter_scheduler(void) cleanup_writable_pagetable( prev, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); -#ifdef PTWR_TRACK_DOMAIN - { - extern domid_t ptwr_domain[]; - int cpu = smp_processor_id(); - if (ptwr_domain[cpu] != prev->domain) - printk("switch_to domain mismatch %d != %d\n", - ptwr_domain[cpu], prev->domain); - ptwr_domain[cpu] = next->domain; - if (ptwr_disconnected[cpu] != ENTRIES_PER_L2_PAGETABLE || - ptwr_writable_idx[cpu]) - printk("switch_to ptwr dirty!!!\n"); - } -#endif - perfc_incrc(sched_ctx); #if defined(WAKE_HISTO) diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 05813d64b7..b60e2e5f42 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -71,10 +71,9 @@ struct pfn_info /* Has this page been validated for use as its current type? */ #define _PGT_validated 28 #define PGT_validated (1<<_PGT_validated) - /* 10-bit most significant bits of va address if used as l1 page table */ + /* The 10 most significant bits of virt address if this is a L1 page table. */ #define PGT_va_shift 18 #define PGT_va_mask (((1<<10)-1)<count_info)) ) - { - /* if the page is pinned, but we're dropping the last reference - then make the va backpointer mutable again */ - nx |= PGT_va_mutable; - } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); } @@ -230,27 +222,15 @@ static inline int get_page_type(struct pfn_info *page, u32 type) nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); nx |= type; /* No extra validation needed for writable pages. */ - if ( (type & PGT_type_mask) == PGT_writable_page ) + if ( type == PGT_writable_page ) nx |= PGT_validated; } } - else if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) + else if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) { - DPRINTK("Unexpected type (saw %08x != exp %08x) for pfn %08lx\n", - x & PGT_type_mask, type, page_to_pfn(page)); - return 0; - } - else if ( (x & PGT_va_mask) == PGT_va_mutable ) - { - /* The va_backpointer is currently mutable, hence we update it. */ - nx &= ~PGT_va_mask; - nx |= type; /* we know the actual type is correct */ - } - else if ( unlikely((x & PGT_va_mask) != (type & PGT_va_mask) ) ) - { - /* The va backpointer wasn't mutable, and is different :-( */ - DPRINTK("Unexpected va backpointer (saw %08x != exp %08x) for pfn %08lx\n", - x, type, page_to_pfn(page)); + DPRINTK("Unexpected type or va backptr (saw %08x != exp %08x) " + "for pfn %08lx\n", + x & (PGT_type_mask|PGT_va_mask), type, page_to_pfn(page)); return 0; } else if ( unlikely(!(x & PGT_validated)) ) @@ -286,55 +266,6 @@ static inline int get_page_type(struct pfn_info *page, u32 type) return 1; } -/* This 'passive' version of get_page_type doesn't attempt to validate -the page, but just checks the type and increments the type count. The -function is called while doing a NORMAL_PT_UPDATE of an entry in an L1 -page table: We want to 'lock' the page for the brief beriod while -we're doing the update, but we're not actually linking it in to a -pagetable. */ - -static inline int passive_get_page_type(struct pfn_info *page, u32 type) -{ - u32 nx, x, y = page->u.inuse.type_info; - again: - do { - x = y; - nx = x + 1; - if ( unlikely((nx & PGT_count_mask) == 0) ) - { - DPRINTK("Type count overflow on pfn %08lx\n", page_to_pfn(page)); - return 0; - } - else if ( unlikely((x & PGT_count_mask) == 0) ) - { - if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) - { - nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); - nx |= type; - } - } - else if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) - { - DPRINTK("Unexpected type (saw %08x != exp %08x) for pfn %08lx\n", - x & PGT_type_mask, type, page_to_pfn(page)); - return 0; - } - else if ( unlikely(!(x & PGT_validated)) ) - { - /* Someone else is updating validation of this page. Wait... */ - while ( (y = page->u.inuse.type_info) != x ) - { - rep_nop(); - barrier(); - } - goto again; - } - } - while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); - - return 1; -} - static inline void put_page_and_type(struct pfn_info *page) { diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 3a0189a655..36528bfda4 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -104,9 +104,9 @@ * ptr[1:0] == MMU_EXTENDED_COMMAND: * val[7:0] -- MMUEXT_* command. * - * val[7:0] == MMUEXT_(UN)PIN_*_TABLE: - * ptr[:2] -- Machine address of frame to be (un)pinned as a p.t. page. - * The frame must belong to the FD, if one is specified. + * val[7:0] == MMUEXT_[UN]PIN_TABLE: + * ptr[:2] -- Machine address of frame to be (un)pinned as a top-level p.t. + * page. The frame must belong to the FD, if one is specified. * * val[7:0] == MMUEXT_NEW_BASEPTR: * ptr[:2] -- Machine address of new page-table base to install in MMU. @@ -142,10 +142,7 @@ #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ #define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */ #define MMU_EXTENDED_COMMAND 3 /* least 8 bits of val demux further */ -#define MMUEXT_PIN_L1_TABLE 0 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L2_TABLE 1 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L3_TABLE 2 /* ptr = MA of frame to pin */ -#define MMUEXT_PIN_L4_TABLE 3 /* ptr = MA of frame to pin */ +#define MMUEXT_PIN_TABLE 0 /* ptr = MA of frame to pin */ #define MMUEXT_UNPIN_TABLE 4 /* ptr = MA of frame to unpin */ #define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */ #define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */ -- 2.30.2